Problem 1.6

a.)

airpol <-
  read.csv("~/GitHub/STA135/Homework/HW1/Air-Pollution Data G.C.Tao.csv",
           header = TRUE)
library(ggplot2)
library(ggExtra)
## Warning: package 'ggExtra' was built under R version 4.1.3
library(GGally)
## Warning: package 'GGally' was built under R version 4.1.3
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
#Pairwise plots
ggpairs(airpol)

#Marginal plots
colnames <- names(airpol)
for (i in colnames[-1]){
  print(ggMarginal(ggplot(airpol, aes_string(x = "Wind..x1.", y = i)) + geom_point(color = 'firebrick'), type = 'histogram', fill = 'dodgerblue'))
}

### b.)

#xbar, mean vector
colMeans(airpol[sapply(airpol, is.numeric)]) 
##            Wind..x1. Solar.Radiation..x2.               CO.x3. 
##             7.500000            73.857143             4.547619 
##               NO.x4.              N02.x5.               O3.x6. 
##             2.190476            10.047619             9.404762 
##               HC.x7. 
##             3.095238
#Sn, the COV/VAR marix
n = nrow(airpol)
cov(airpol) * (n-1)/n
##                       Wind..x1. Solar.Radiation..x2.     CO.x3.     NO.x4.
## Wind..x1.             2.4404762           -2.7142857 -0.3690476 -0.4523810
## Solar.Radiation..x2. -2.7142857          293.3605442  3.8163265 -1.3537415
## CO.x3.               -0.3690476            3.8163265  1.4858277  0.6575964
## NO.x4.               -0.4523810           -1.3537415  0.6575964  1.1541950
## N02.x5.              -0.5714286            6.6020408  2.2596372  1.0623583
## O3.x6.               -2.1785714           30.0578231  2.7545351 -0.7913832
## HC.x7.                0.1666667            0.6088435  0.1383220  0.1723356
##                         N02.x5.     O3.x6.    HC.x7.
## Wind..x1.            -0.5714286 -2.1785714 0.1666667
## Solar.Radiation..x2.  6.6020408 30.0578231 0.6088435
## CO.x3.                2.2596372  2.7545351 0.1383220
## NO.x4.                1.0623583 -0.7913832 0.1723356
## N02.x5.              11.0929705  3.0521542 1.0192744
## O3.x6.                3.0521542 30.2409297 0.5804989
## HC.x7.                1.0192744  0.5804989 0.4671202
# R the correlation matrix
round(cor(airpol),2)
##                      Wind..x1. Solar.Radiation..x2. CO.x3. NO.x4. N02.x5.
## Wind..x1.                 1.00                -0.10  -0.19  -0.27   -0.11
## Solar.Radiation..x2.     -0.10                 1.00   0.18  -0.07    0.12
## CO.x3.                   -0.19                 0.18   1.00   0.50    0.56
## NO.x4.                   -0.27                -0.07   0.50   1.00    0.30
## N02.x5.                  -0.11                 0.12   0.56   0.30    1.00
## O3.x6.                   -0.25                 0.32   0.41  -0.13    0.17
## HC.x7.                    0.16                 0.05   0.17   0.23    0.45
##                      O3.x6. HC.x7.
## Wind..x1.             -0.25   0.16
## Solar.Radiation..x2.   0.32   0.05
## CO.x3.                 0.41   0.17
## NO.x4.                -0.13   0.23
## N02.x5.                0.17   0.45
## O3.x6.                 1.00   0.15
## HC.x7.                 0.15   1.00

We can see that none of the variables have a very high correlation. We can see the highest correlation in our data is between Carbon Monoxide (CO) and Nitrogen Dioxide (NO2) of 0.557. We also can see that Wind is negatively correlated with all pollutants.

Problem 1.9

#a.)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
x1 <- c(-6, -3, -2, 1, 2, 5, 6, 8)
x2 <- c(-2, -3, 1, -1, 2, 1, 5, 3)
mydata <- data.frame(x1, x2)

#ggplot(aes(x = x1, y = x2)) + geom_point()
fig <- plot_ly(mydata, x = ~x1, y = ~x2)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
n = nrow(mydata)
myvarmat = cov(mydata) * (n-1)/n
myvarmat
##          x1      x2
## x1 20.48438 9.09375
## x2  9.09375 6.18750
s11 = myvarmat[1]
s12 = myvarmat[2]
s22 = myvarmat[4]
#b.) 
#Formula pdf page 56

# x1~ = x1 costheta + x2 sintheta
x1tilde = x1 * 0.899 + x2 * 0.438

# x2~ = -x1 sintheta + x2 costheta
x2tilde = -x1 * 0.438 + x2 * 0.899

#c 
datatilde = data.frame(x1tilde, x2tilde)
n = nrow(datatilde)
diag(var(datatilde))  #* (n-1)/n)
##   x1tilde   x2tilde 
## 28.461798  2.021716
#d
newx1 = 4 * 0.899 + -2 * 0.438
newx2 = -4 * 0.438 + -2 * 0.899

dOP = sqrt((newx1^2/28.462) + (newx2^2/2.022))
dOP
## [1] 2.548064
#e
#From footnote
a11 = (0.899^2/(0.899^2 * s11 + 2 * 0.438 * 0.899 * s12 + 0.438^2 * s22)) + (0.438^2/ (0.899^2 * s22 - 2 * 0.438 * 0.899 * s12 + 0.438^2 * s11))

a22 = (0.438^2 / (0.899^2 * s11 + 2 * 0.438 * 0.899 *s12 + 0.438^2 * s22)) + (0.899^2 / (0.899^2 * s22 - 2 * 0.438 * 0.899*s12 + 0.438^2 * s11))

a12 = ((0.438 * 0.899) / (0.899^2 * s11 + 2 * 0.438 * 0.899 * s12 + 0.438^2 * s22)) - ((0.438 * 0.899)/ (0.899^2 * s22 - 2 * 0.438 * 0.899 * s12 + 0.438^2 * s11))

dOP2 = sqrt((a11 * 16) + (2*a12 * 4 * -2) + (a22 * 4))
dOP2
## [1] 2.724179

We can see that within rounding error, these two distances are about the same.

P 1.18

WomenTrack <- read.csv("~/Github/STA135/Homework/HW1/National Track Records for Women.csv", header =  TRUE)
WomenTrack
##              Country X100m.s. X200m.s. X400m.s. X800m.min. X1500m.min.
## 1          Argentina    11.57    22.94    52.50       2.05        4.25
## 2          Australia    11.12    22.23    48.63       1.98        4.02
## 3            Austria    11.15    22.70    50.62       1.94        4.05
## 4            Belgium    11.14    22.48    51.45       1.97        4.08
## 5            Bermuda    11.46    23.05    53.30       2.07        4.29
## 6             Brazil    11.17    22.60    50.62       1.97        4.17
## 7             Canada    10.98    22.62    49.91       1.97        4.00
## 8              Chile    11.65    23.84    53.68       2.00        4.22
## 9              China    10.79    22.01    49.81       1.93        3.84
## 10          Columbia    11.31    22.92    49.64       2.04        4.34
## 11      Cook Islands    12.52    25.91    61.65       2.28        4.82
## 12        Costa Rica    11.72    23.92    52.57       2.10        4.52
## 13    Czech Republic    11.09    21.97    47.99       1.89        4.03
## 14           Denmark    11.42    23.36    52.92       2.02        4.12
## 15 Domincan Republic    11.63    23.91    53.02       2.09        4.54
## 16           Finland    11.13    22.39    50.14       2.01        4.10
## 17            France    10.73    21.99    48.25       1.94        4.03
## 18           Germany    10.81    21.71    47.60       1.92        3.96
## 19     Great Britian    11.10    22.10    49.43       1.94        3.97
## 20            Greece    10.83    22.67    50.56       2.00        4.09
## 21         Guatemala    11.92    24.50    55.64       2.15        4.48
## 22           Hungary    11.41    23.06    51.50       1.99        4.02
## 23             India    11.56    23.86    55.08       2.10        4.36
## 24         Indonesia    11.38    22.82    51.05       2.00        4.10
## 25           Ireland    11.43    23.02    51.07       2.01        3.98
## 26            Israel    11.45    23.15    52.06       2.07        4.24
## 27             Italy    11.14    22.60    51.31       1.96        3.98
## 28             Japan    11.36    23.33    51.93       2.01        4.16
## 29             Kenya    11.62    23.37    51.56       1.97        3.96
## 30      Korea, South    11.49    23.80    53.67       2.09        4.24
## 31      Korea, North    11.80    25.10    56.23       1.97        4.25
## 32        Luxembourg    11.76    23.96    56.07       2.07        4.35
## 33          Malaysia    11.50    23.37    52.56       2.12        4.39
## 34         Mauritius    11.72    23.83    54.62       2.06        4.33
## 35            Mexico    11.09    23.13    48.89       2.02        4.19
## 36    Myanmar(Burma)    11.66    23.69    52.96       2.03        4.20
## 37       Netherlands    11.08    22.81    51.35       1.93        4.06
## 38       New Zealand    11.32    23.13    51.60       1.97        4.10
## 39            Norway    11.41    23.31    52.45       2.03        4.01
## 40  Papua New Guinea    11.96    24.68    55.18       2.24        4.62
## 41       Philippines    11.28    23.35    54.75       2.12        4.41
## 42            Poland    10.93    22.13    49.28       1.95        3.99
## 43          Portugal    11.30    22.88    51.92       1.98        3.96
## 44           Romania    11.30    22.35    49.88       1.92        3.90
## 45            Russia    10.77    21.87    49.11       1.91        3.87
## 46             Samoa    12.38    25.45    56.32       2.29        5.42
## 47         Singapore    12.13    24.54    55.08       2.12        4.52
## 48             Spain    11.06    22.38    49.67       1.96        4.01
## 49            Sweden    11.16    22.82    51.69       1.99        4.09
## 50       Switzerland    11.34    22.88    51.32       1.98        3.97
## 51            Taiwan    11.22    22.56    52.74       2.08        4.38
## 52          Thailand    11.33    23.30    52.60       2.06        4.38
## 53            Turkey    11.25    22.71    53.15       2.01        3.92
## 54            U.S.A.    10.49    21.34    48.83       1.94        3.95
##    X3000m.min. Marathon
## 1         9.19   150.32
## 2         8.63   143.51
## 3         8.78   154.35
## 4         8.82   143.05
## 5         9.81   174.18
## 6         9.04   147.41
## 7         8.54   148.36
## 8         9.26   152.23
## 9         8.10   139.39
## 10        9.37   155.19
## 11       11.10   212.33
## 12        9.84   164.33
## 13        8.87   145.19
## 14        8.71   149.34
## 15        9.89   166.46
## 16        8.69   148.00
## 17        8.64   148.27
## 18        8.51   141.45
## 19        8.37   135.25
## 20        8.96   153.40
## 21        9.71   171.33
## 22        8.55   148.50
## 23        9.50   154.29
## 24        9.11   158.10
## 25        8.36   142.23
## 26        9.33   156.36
## 27        8.59   143.47
## 28        8.74   139.41
## 29        8.39   138.47
## 30        9.01   146.12
## 31        8.96   145.31
## 32        9.21   149.23
## 33        9.31   169.28
## 34        9.24   167.09
## 35        8.89   144.06
## 36        9.08   158.42
## 37        8.57   143.43
## 38        8.76   146.46
## 39        8.53   141.06
## 40       10.21   221.14
## 41        9.81   165.48
## 42        8.53   144.18
## 43        8.50   143.29
## 44        8.36   142.50
## 45        8.38   141.31
## 46       13.12   191.58
## 47        9.94   154.41
## 48        8.48   146.51
## 49        8.81   150.39
## 50        8.60   145.51
## 51        9.63   159.53
## 52       10.07   162.39
## 53        8.53   151.43
## 54        8.43   141.16
#First, convert last 4 columns into seconds 
WomenTrack[,5:8] = WomenTrack[, 5:8] * 60
WomenTrack
##              Country X100m.s. X200m.s. X400m.s. X800m.min. X1500m.min.
## 1          Argentina    11.57    22.94    52.50      123.0       255.0
## 2          Australia    11.12    22.23    48.63      118.8       241.2
## 3            Austria    11.15    22.70    50.62      116.4       243.0
## 4            Belgium    11.14    22.48    51.45      118.2       244.8
## 5            Bermuda    11.46    23.05    53.30      124.2       257.4
## 6             Brazil    11.17    22.60    50.62      118.2       250.2
## 7             Canada    10.98    22.62    49.91      118.2       240.0
## 8              Chile    11.65    23.84    53.68      120.0       253.2
## 9              China    10.79    22.01    49.81      115.8       230.4
## 10          Columbia    11.31    22.92    49.64      122.4       260.4
## 11      Cook Islands    12.52    25.91    61.65      136.8       289.2
## 12        Costa Rica    11.72    23.92    52.57      126.0       271.2
## 13    Czech Republic    11.09    21.97    47.99      113.4       241.8
## 14           Denmark    11.42    23.36    52.92      121.2       247.2
## 15 Domincan Republic    11.63    23.91    53.02      125.4       272.4
## 16           Finland    11.13    22.39    50.14      120.6       246.0
## 17            France    10.73    21.99    48.25      116.4       241.8
## 18           Germany    10.81    21.71    47.60      115.2       237.6
## 19     Great Britian    11.10    22.10    49.43      116.4       238.2
## 20            Greece    10.83    22.67    50.56      120.0       245.4
## 21         Guatemala    11.92    24.50    55.64      129.0       268.8
## 22           Hungary    11.41    23.06    51.50      119.4       241.2
## 23             India    11.56    23.86    55.08      126.0       261.6
## 24         Indonesia    11.38    22.82    51.05      120.0       246.0
## 25           Ireland    11.43    23.02    51.07      120.6       238.8
## 26            Israel    11.45    23.15    52.06      124.2       254.4
## 27             Italy    11.14    22.60    51.31      117.6       238.8
## 28             Japan    11.36    23.33    51.93      120.6       249.6
## 29             Kenya    11.62    23.37    51.56      118.2       237.6
## 30      Korea, South    11.49    23.80    53.67      125.4       254.4
## 31      Korea, North    11.80    25.10    56.23      118.2       255.0
## 32        Luxembourg    11.76    23.96    56.07      124.2       261.0
## 33          Malaysia    11.50    23.37    52.56      127.2       263.4
## 34         Mauritius    11.72    23.83    54.62      123.6       259.8
## 35            Mexico    11.09    23.13    48.89      121.2       251.4
## 36    Myanmar(Burma)    11.66    23.69    52.96      121.8       252.0
## 37       Netherlands    11.08    22.81    51.35      115.8       243.6
## 38       New Zealand    11.32    23.13    51.60      118.2       246.0
## 39            Norway    11.41    23.31    52.45      121.8       240.6
## 40  Papua New Guinea    11.96    24.68    55.18      134.4       277.2
## 41       Philippines    11.28    23.35    54.75      127.2       264.6
## 42            Poland    10.93    22.13    49.28      117.0       239.4
## 43          Portugal    11.30    22.88    51.92      118.8       237.6
## 44           Romania    11.30    22.35    49.88      115.2       234.0
## 45            Russia    10.77    21.87    49.11      114.6       232.2
## 46             Samoa    12.38    25.45    56.32      137.4       325.2
## 47         Singapore    12.13    24.54    55.08      127.2       271.2
## 48             Spain    11.06    22.38    49.67      117.6       240.6
## 49            Sweden    11.16    22.82    51.69      119.4       245.4
## 50       Switzerland    11.34    22.88    51.32      118.8       238.2
## 51            Taiwan    11.22    22.56    52.74      124.8       262.8
## 52          Thailand    11.33    23.30    52.60      123.6       262.8
## 53            Turkey    11.25    22.71    53.15      120.6       235.2
## 54            U.S.A.    10.49    21.34    48.83      116.4       237.0
##    X3000m.min. Marathon
## 1        551.4   9019.2
## 2        517.8   8610.6
## 3        526.8   9261.0
## 4        529.2   8583.0
## 5        588.6  10450.8
## 6        542.4   8844.6
## 7        512.4   8901.6
## 8        555.6   9133.8
## 9        486.0   8363.4
## 10       562.2   9311.4
## 11       666.0  12739.8
## 12       590.4   9859.8
## 13       532.2   8711.4
## 14       522.6   8960.4
## 15       593.4   9987.6
## 16       521.4   8880.0
## 17       518.4   8896.2
## 18       510.6   8487.0
## 19       502.2   8115.0
## 20       537.6   9204.0
## 21       582.6  10279.8
## 22       513.0   8910.0
## 23       570.0   9257.4
## 24       546.6   9486.0
## 25       501.6   8533.8
## 26       559.8   9381.6
## 27       515.4   8608.2
## 28       524.4   8364.6
## 29       503.4   8308.2
## 30       540.6   8767.2
## 31       537.6   8718.6
## 32       552.6   8953.8
## 33       558.6  10156.8
## 34       554.4  10025.4
## 35       533.4   8643.6
## 36       544.8   9505.2
## 37       514.2   8605.8
## 38       525.6   8787.6
## 39       511.8   8463.6
## 40       612.6  13268.4
## 41       588.6   9928.8
## 42       511.8   8650.8
## 43       510.0   8597.4
## 44       501.6   8550.0
## 45       502.8   8478.6
## 46       787.2  11494.8
## 47       596.4   9264.6
## 48       508.8   8790.6
## 49       528.6   9023.4
## 50       516.0   8730.6
## 51       577.8   9571.8
## 52       604.2   9743.4
## 53       511.8   9085.8
## 54       505.8   8469.6
#Second, divide each column by respective meters to get meters per second